package cn.edu.bjtu.classimpl.parser;

import cn.edu.bjtu.classimpl.documententity.Document;
import cn.edu.bjtu.interfaces.document.IDocument;
import cn.edu.bjtu.interfaces.parser.Parser;

/**
 * @author Alex
 *
 */
public class HiveWechatParser implements Parser{

	@Override
	public IDocument parse(String line) {
		if(line == null) return null;
		String parts [] = line.split("\t");
		String id = new StringBuilder().append(parts[0]).append(parts[1]).append(parts[2]).toString();
		String label;
		if(parts[0].split(",").length>0)
		  label = parts[0].split(",")[0].substring(1);
		else{
			label = "unknown";
		}
		String content = parts[8];
		return new Document(id,label,content);
	}

}
